import re


trash_list = ['来自.*客户端','^\|$','积分','签到','当前[离|在]线','评分','威望']
def useful(text):
    for t in trash_list:
        if len(re.findall(t, text)) > 0:
            return False;
    return True

def find_time(text):
    time_pattern = ['\d{2,4}-\d{1,2}-\d{1,2}','\d{2,4}年\d{1,2}月\d{1,2}','\d{1,2}-\d{1,2}']
    for p in time_pattern:
        reg = re.compile(p)
        for t in reg.findall(text):
            text = re.sub(t,'',text)
            yield t
            
def get_n_list(text):
    count_list = []
    count = 0
    s = ''
    for i in text:
        if i in ['<','t','a','g','>']:
            s += i
            if s == '<tag>':
                s = ''
                count += 1
        elif count != 0:
            count_list.append(count)
            count = 0
    return count_list


def partition(suffix_array, start, end):
    if end <= start:
        return
    index1, index2 = start, end
    base = suffix_array[start]
    while index1 < index2 and suffix_array[index2] >= base:
        index2 -= 1
    suffix_array[index1] = suffix_array[index2]
    while index1 < index2 and suffix_array[index1] <= base:
        index1 += 1
    suffix_array[index2] = suffix_array[index1]
    suffix_array[index1] = base
    partition(suffix_array, start, index1 -  1)
    partition(suffix_array, index1 + 1, end)


def find_common_string(str1, str2):
    if not str1 or not str2:
        return 0, ''
    index1, index2 = 0, 0
    length, comm_substr = 0, ''
    while index1 < len(str1) and index2 < len(str2):
        if str1[index1] == str2[index2]:
            length += 1
            comm_substr += str1[index1]
        else:
            break
        index1 += 1
        index2 += 1
    return length, comm_substr


def find_longest_repeating_strings(string):
    if not string:
        return None, None
    suffix_array = []
    # first, get the suffix arrays
    length = len(string)
    for i in range(length):
        suffix_array.append(string[i:])
    # second, sort suffix array
    start, end = 0, len(suffix_array) - 1
    partition(suffix_array, start, end)
    # third, get the longest repeating substring
    max_length,  repeat_substring = 0, ''
    for i in range(len(suffix_array) - 1):
        common_len, common_substring = find_common_string(suffix_array[i], suffix_array[i+1])
        if common_len > max_length:
            max_length, repeat_substring = common_len, common_substring
    return max_length, repeat_substring
